library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.1 ✓ dplyr 1.0.0
## ✓ tidyr 1.1.0 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(cowplot)
##
## ********************************************************
## Note: As of version 1.0.0, cowplot does not change the
## default ggplot2 theme anymore. To recover the previous
## behavior, execute:
## theme_set(theme_cowplot())
## ********************************************************
Descargar datos: datos
eukaryotes<-read_csv("data/eukaryotes.csv", col_names = T)
## Parsed with column specification:
## cols(
## `#Organism Name` = col_character(),
## `Organism Groups` = col_character(),
## Strain = col_character(),
## BioSample = col_character(),
## BioProject = col_character(),
## Assembly = col_character(),
## Level = col_character(),
## `Size(Mb)` = col_double(),
## `GC%` = col_double(),
## Replicons = col_character(),
## WGS = col_character(),
## Scaffolds = col_double(),
## CDS = col_double(),
## `Release Date` = col_datetime(format = ""),
## `GenBank FTP` = col_character(),
## `RefSeq FTP` = col_character()
## )
eukaryotes %>% glimpse()
## Rows: 5,131
## Columns: 16
## $ `#Organism Name` <chr> "Emiliania huxleyi CCMP1516", "Arabidopsis thaliana…
## $ `Organism Groups` <chr> "Eukaryota;Protists;Other Protists", "Eukaryota;Pla…
## $ Strain <chr> "CCMP1516", NA, "A17", NA, NA, NA, NA, NA, NA, "B80…
## $ BioSample <chr> "SAMN02744062", "SAMN03081427", "SAMN02299339", "SA…
## $ BioProject <chr> "PRJNA77753", "PRJNA10719", "PRJNA10791", "PRJNA198…
## $ Assembly <chr> "GCA_000372725.1", "GCA_000001735.1", "GCA_00021949…
## $ Level <chr> "Scaffold", "Chromosome", "Chromosome", "Chromosome…
## $ `Size(Mb)` <dbl> 167.67600, 119.66800, 412.92400, 978.97200, 823.786…
## $ `GC%` <dbl> 64.5000, 36.0528, 34.0470, 35.1208, 35.7097, 0.0000…
## $ Replicons <chr> NA, "chromosome 1:NC_003070.9/CP002684.1; chromosom…
## $ WGS <chr> "AHAL01", NA, "APNO01", "ACUP02", "AEKE02", "FJWB02…
## $ Scaffolds <dbl> 7795, 7, 2187, 1191, 3224, 72295, 58, 279439, 598, …
## $ CDS <dbl> 38554, 48350, 57661, 71525, 36010, 0, 41070, 0, 584…
## $ `Release Date` <dttm> 2013-04-19, 2001-08-13, 2011-08-12, 2010-01-05, 20…
## $ `GenBank FTP` <chr> "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/372…
## $ `RefSeq FTP` <chr> "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/372…
eukaryotes %>% select(c("#Organism Name","Organism Groups","Strain","Size(Mb)","GC%","CDS")) -> eukaryotes_subset
eukaryotes_subset %>% head()
## # A tibble: 6 x 6
## `#Organism Name` `Organism Groups` Strain `Size(Mb)` `GC%` CDS
## <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 Emiliania huxleyi CCM… Eukaryota;Protists;Othe… CCMP15… 168. 64.5 38554
## 2 Arabidopsis thaliana Eukaryota;Plants;Land P… <NA> 120. 36.1 48350
## 3 Medicago truncatula Eukaryota;Plants;Land P… A17 413. 34.0 57661
## 4 Glycine max Eukaryota;Plants;Land P… <NA> 979. 35.1 71525
## 5 Solanum lycopersicum Eukaryota;Plants;Land P… <NA> 824. 35.7 36010
## 6 Hordeum vulgare Eukaryota;Plants;Land P… <NA> 9789. 0 0
eukaryotes_subset %>% rename(organism="#Organism Name",groups="Organism Groups",genome_size_mb="Size(Mb)",gc_percent="GC%") %>% rename_with(tolower) -> eukaryotes_subset
eukaryotes_subset %>% head()
## # A tibble: 6 x 6
## organism groups strain genome_size_mb gc_percent cds
## <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 Emiliania huxleyi… Eukaryota;Protists… CCMP15… 168. 64.5 38554
## 2 Arabidopsis thali… Eukaryota;Plants;L… <NA> 120. 36.1 48350
## 3 Medicago truncatu… Eukaryota;Plants;L… A17 413. 34.0 57661
## 4 Glycine max Eukaryota;Plants;L… <NA> 979. 35.1 71525
## 5 Solanum lycopersi… Eukaryota;Plants;L… <NA> 824. 35.7 36010
## 6 Hordeum vulgare Eukaryota;Plants;L… <NA> 9789. 0 0
eukaryotes_subset %>% separate(groups,c("domain","kingdom","class"),sep = ";") -> eukaryotes_subset
eukaryotes_subset %>% head()
## # A tibble: 6 x 8
## organism domain kingdom class strain genome_size_mb gc_percent cds
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 Emiliania hux… Eukary… Protists Other … CCMP1… 168. 64.5 38554
## 2 Arabidopsis t… Eukary… Plants Land P… <NA> 120. 36.1 48350
## 3 Medicago trun… Eukary… Plants Land P… A17 413. 34.0 57661
## 4 Glycine max Eukary… Plants Land P… <NA> 979. 35.1 71525
## 5 Solanum lycop… Eukary… Plants Land P… <NA> 824. 35.7 36010
## 6 Hordeum vulga… Eukary… Plants Land P… <NA> 9789. 0 0
eukaryotes_subset %>% count(kingdom) %>% arrange(desc(n))
## # A tibble: 5 x 2
## kingdom n
## <chr> <int>
## 1 Fungi 2844
## 2 Animals 1250
## 3 Protists 539
## 4 Plants 478
## 5 Other 20
eukaryotes_subset %>% ggplot(aes(x=kingdom)) + geom_histogram(stat = "count")
## Warning: Ignoring unknown parameters: binwidth, bins, pad
eukaryotes_subset %>% group_by(kingdom) %>% count() %>% ggplot(aes(x=kingdom,y=n)) +
geom_col()
#reordenar
#Colores (fill, scale_fill_brewer(palette = "Set1"))
#Añadir capas (p.ej. coord_flip())
#Transparencia
#etiquetas de los ejes, labs(), (símbolos, ej.: expression(paste("Etiqueta ",mu)))
# Modificar tema: theme_cowplot()
eukaryotes_subset %>% group_by(kingdom) %>% summarise(mean=mean(genome_size_mb),sd=sd(genome_size_mb),max=max(genome_size_mb),min=min(genome_size_mb))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 5 x 5
## kingdom mean sd max min
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Animals 1132. 1434. 32394. 0.663
## 2 Fungi 29.9 20.2 216. 2.08
## 3 Other 108. 129. 543. 12.1
## 4 Plants 1023. 2738. 27603. 0.986
## 5 Protists 46.7 57.1 808. 1.45
eukaryotes_subset %>% ggplot(.,aes(x=kingdom,y=genome_size_mb)) +
geom_point()
eukaryotes_subset %>% ggplot(.,aes(x=kingdom,y=genome_size_mb)) +
geom_point() + geom_boxplot()
eukaryotes_subset %>% ggplot(.,aes(x=kingdom,y=genome_size_mb)) + geom_violin()
# Zoom, Fungi
# Ejemplo de etiquetas para el eje y: scale_y_continuous(breaks = seq(0,250,by=30))
#Añadir observaciones + geom_jitter(width=0.15, alpha=0.3)
eukaryotes_subset %>% filter(genome_size_mb >0 & cds > 0) %>% ggplot(.,aes(x=cds,y=genome_size_mb,color=kingdom, size=cds)) +
geom_point(alpha=0.5) -> p
p
### Hacer versión interactiva
#library(plotly)
#library(cowplot)
options(scipen = 999) #Opción para inhabilitar el uso de notación científica (se usará para mostrar los valores decimales en las etiquetas)
eukaryotes_subset %>% ggplot(.,aes(x=kingdom,y=genome_size_mb,color=kingdom)) +
geom_point(alpha=0.7) + geom_boxplot() -> p
ggplotly(p)
# Simplificar etiquetas: mutate(genome_size_mb=round(genome_size_mb,2))
# Añadir etiquetas a la estética general: text= paste("organism:",organism,"\ncds:",cds)
#Añadir tema: +theme_cowplot()
# Elegir campos para presentar en los recuadros: tooltip = c("text","x","y")
# Modificar menú de gráfica Plotly %>% config(displayModeBar = F)
# Modificar fondo de la etiqueta: %>% layout(hoverlabel=list(bgcolor="white"))